library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tidyverse)
health_full <- read_csv("https://chronicdata.cdc.gov/api/views/swc5-untb/rows.csv?accessType=DOWNLOAD&bom=true&format=true")
## Rows: 176008 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): StateAbbr, StateDesc, LocationName, DataSource, Category, Measure,...
## dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Lat...
## lgl  (2): Data_Value_Footnote_Symbol, Data_Value_Footnote
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(health_full)
## # A tibble: 6 × 23
##    Year StateAbbr StateDesc     LocationName DataSource Category   Measure      
##   <dbl> <chr>     <chr>         <chr>        <chr>      <chr>      <chr>        
## 1  2018 US        United States <NA>         BRFSS      Prevention Current lack…
## 2  2018 AL        Alabama       Bibb         BRFSS      Health Ou… Cancer (excl…
## 3  2018 AL        Alabama       Bullock      BRFSS      Health Ou… Cancer (excl…
## 4  2018 AL        Alabama       Choctaw      BRFSS      Health Ou… Current asth…
## 5  2018 AL        Alabama       Cleburne     BRFSS      Unhealthy… Binge drinki…
## 6  2018 AL        Alabama       Cleburne     BRFSS      Unhealthy… Obesity amon…
## # … with 16 more variables: Data_Value_Unit <chr>, Data_Value_Type <chr>,
## #   Data_Value <dbl>, Data_Value_Footnote_Symbol <lgl>,
## #   Data_Value_Footnote <lgl>, Low_Confidence_Limit <dbl>,
## #   High_Confidence_Limit <dbl>, TotalPopulation <dbl>, Latitude <dbl>,
## #   Longitude <dbl>, LocationID <chr>, CategoryID <chr>, MeasureId <chr>,
## #   DataValueTypeID <chr>, Short_Question_Text <chr>, geolocation <chr>
year <- 2017:2020
quarters <- 1:4
type <- "Electric"

pge_elec <- NULL

for(quarter in quarters) {
  
  filename <- 
    paste0(
      "pge/PGE_",
      year,
      "_Q",
      quarter,
      "_",
      type,
      "UsageByZip.csv"
    )

  print(filename)
  
  temp <- read_csv(filename)
  
  pge_elec <- rbind(pge_elec,temp)
  # Note rbind requires field names to be consistent for every new thing that you add.

  saveRDS(pge_elec, "pge_elec.rds")
}
## [1] "pge/PGE_2017_Q1_ElectricUsageByZip.csv"
## [2] "pge/PGE_2018_Q1_ElectricUsageByZip.csv"
## [3] "pge/PGE_2019_Q1_ElectricUsageByZip.csv"
## [4] "pge/PGE_2020_Q1_ElectricUsageByZip.csv"
## Rows: 31363 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "pge/PGE_2017_Q2_ElectricUsageByZip.csv"
## [2] "pge/PGE_2018_Q2_ElectricUsageByZip.csv"
## [3] "pge/PGE_2019_Q2_ElectricUsageByZip.csv"
## [4] "pge/PGE_2020_Q2_ElectricUsageByZip.csv"
## Rows: 31377 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "pge/PGE_2017_Q3_ElectricUsageByZip.csv"
## [2] "pge/PGE_2018_Q3_ElectricUsageByZip.csv"
## [3] "pge/PGE_2019_Q3_ElectricUsageByZip.csv"
## [4] "pge/PGE_2020_Q3_ElectricUsageByZip.csv"
## Rows: 31394 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "pge/PGE_2017_Q4_ElectricUsageByZip.csv"
## [2] "pge/PGE_2018_Q4_ElectricUsageByZip.csv"
## [3] "pge/PGE_2019_Q4_ElectricUsageByZip.csv"
## [4] "pge/PGE_2020_Q4_ElectricUsageByZip.csv"
## Rows: 33975 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pge_filter <- filter(pge_elec, CUSTOMERCLASS %in% c("Elec- Residential","Elec- Commercial"))


names(pge_filter)
## [1] "ZIPCODE"        "MONTH"          "YEAR"           "CUSTOMERCLASS" 
## [5] "COMBINED"       "TOTALCUSTOMERS" "TOTALKWH"       "AVERAGEKWH"
head(pge_filter)
## # A tibble: 6 × 8
##   ZIPCODE MONTH  YEAR CUSTOMERCLASS    COMBINED TOTALCUSTOMERS TOTALKWH AVERAGEKWH
##     <dbl> <dbl> <dbl> <chr>            <chr>             <dbl>    <dbl>      <dbl>
## 1   93101     1  2017 Elec- Commercial Y                     0        0         NA
## 2   93101     2  2017 Elec- Commercial Y                     0        0         NA
## 3   93101     3  2017 Elec- Commercial Y                     0        0         NA
## 4   93105     1  2017 Elec- Commercial Y                     0        0         NA
## 5   93105     2  2017 Elec- Commercial Y                     0        0         NA
## 6   93105     3  2017 Elec- Commercial Y                     0        0         NA
pge_select <-
  select(
    pge_filter,
  )
pge_select <-
  select(
    pge_filter,
    -c(COMBINED, AVERAGEKWH)
  )


pge_group <-
  group_by(
    pge_select,
    YEAR,
    MONTH,
    CUSTOMERCLASS
  )


pge_summarize <-
  summarize(
    pge_group,
    TOTALKWH = 
      sum(
        TOTALKWH, 
        na.rm = T
      )
  )
## `summarise()` has grouped output by 'YEAR', 'MONTH'. You can override using the `.groups` argument.
pge_wide <-
  pivot_wider(
    pge_summarize,
    names_from = CUSTOMERCLASS,
    values_from = TOTALKWH
  )


pge_wide
## # A tibble: 48 × 4
## # Groups:   YEAR, MONTH [48]
##     YEAR MONTH `Elec- Commercial` `Elec- Residential`
##    <dbl> <dbl>              <dbl>               <dbl>
##  1  2017     1         2518133184          2696632298
##  2  2017     2         2186409895          2089405296
##  3  2017     3         2328765919          2007875145
##  4  2017     4         2260528589          1838107707
##  5  2017     5         2472760712          2070656073
##  6  2017     6         2579187737          2615607826
##  7  2017     7         2808792813          3113124827
##  8  2017     8         2925396026          2995761408
##  9  2017     9         5462119756          5072659238
## 10  2017    10         2739510203          1956786021
## # … with 38 more rows
pge_tidy <-
  pivot_longer(
    pge_wide,
    c("Elec- Commercial", "Elec- Residential"),
    names_to = "CUSTOMERCLASS",
    values_to = "TOTALKWH"
  )

pge_tidy
## # A tibble: 96 × 4
## # Groups:   YEAR, MONTH [48]
##     YEAR MONTH CUSTOMERCLASS       TOTALKWH
##    <dbl> <dbl> <chr>                  <dbl>
##  1  2017     1 Elec- Commercial  2518133184
##  2  2017     1 Elec- Residential 2696632298
##  3  2017     2 Elec- Commercial  2186409895
##  4  2017     2 Elec- Residential 2089405296
##  5  2017     3 Elec- Commercial  2328765919
##  6  2017     3 Elec- Residential 2007875145
##  7  2017     4 Elec- Commercial  2260528589
##  8  2017     4 Elec- Residential 1838107707
##  9  2017     5 Elec- Commercial  2472760712
## 10  2017     5 Elec- Residential 2070656073
## # … with 86 more rows
pge_summarize <-
  summarize(
    pge_group,
    TOTALKWH = 
      sum(
        TOTALKWH, 
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  )
## `summarise()` has grouped output by 'YEAR', 'MONTH'. You can override using the `.groups` argument.
pge_mutate <-
  mutate(
    pge_summarize,
    AVERAGEKWH =
      TOTALKWH/TOTALCUSTOMERS
  ) %>%
  mutate(
    DATE2 = 
      paste(
        YEAR,
        MONTH, 
        "01",
        sep="-"
      ) %>% as.Date()
  )


pge_mutate
## # A tibble: 96 × 7
## # Groups:   YEAR, MONTH [48]
##     YEAR MONTH CUSTOMERCLASS       TOTALKWH TOTALCUSTOMERS AVERAGEKWH DATE2     
##    <dbl> <dbl> <chr>                  <dbl>          <dbl>      <dbl> <date>    
##  1  2017     1 Elec- Commercial  2518133184         312242      8065. 2017-01-01
##  2  2017     1 Elec- Residential 2696632298        4427009       609. 2017-01-01
##  3  2017     2 Elec- Commercial  2186409895         312327      7000. 2017-02-01
##  4  2017     2 Elec- Residential 2089405296        4429320       472. 2017-02-01
##  5  2017     3 Elec- Commercial  2328765919         312586      7450. 2017-03-01
##  6  2017     3 Elec- Residential 2007875145        4432096       453. 2017-03-01
##  7  2017     4 Elec- Commercial  2260528589         313499      7211. 2017-04-01
##  8  2017     4 Elec- Residential 1838107707        4437095       414. 2017-04-01
##  9  2017     5 Elec- Commercial  2472760712         313795      7880. 2017-05-01
## 10  2017     5 Elec- Residential 2070656073        4438946       466. 2017-05-01
## # … with 86 more rows
# piping practice
pge_final_elec <-
  pge_elec %>% 
  filter(
    CUSTOMERCLASS %in% 
      c("Elec- Residential", "Elec- Commercial")
  ) %>% 
  mutate(
    DATE = 
      paste(
        YEAR,
        MONTH, 
        "01",
        sep="-"
      ) %>% as.Date()
  ) %>% 
  group_by(DATE, CUSTOMERCLASS) %>% 
    select(
      -c(YEAR, MONTH, COMBINED, AVERAGEKWH)
    ) %>%
  summarize(
    TOTALKBTU = 
      sum(
        3.41214*TOTALKWH, 
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  ) %>% 
  mutate(
    AVERAGEKBTU =
      TOTALKBTU/TOTALCUSTOMERS)
## `summarise()` has grouped output by 'DATE'. You can override using the `.groups` argument.
pge_final_elec
## # A tibble: 96 × 5
## # Groups:   DATE [48]
##    DATE       CUSTOMERCLASS       TOTALKBTU TOTALCUSTOMERS AVERAGEKBTU
##    <date>     <chr>                   <dbl>          <dbl>       <dbl>
##  1 2017-01-01 Elec- Commercial  8592222962.         312242      27518.
##  2 2017-01-01 Elec- Residential 9201286929.        4427009       2078.
##  3 2017-02-01 Elec- Commercial  7460336659.         312327      23886.
##  4 2017-02-01 Elec- Residential 7129343387.        4429320       1610.
##  5 2017-03-01 Elec- Commercial  7946075343.         312586      25420.
##  6 2017-03-01 Elec- Residential 6851151097.        4432096       1546.
##  7 2017-04-01 Elec- Commercial  7713240020.         313499      24604.
##  8 2017-04-01 Elec- Residential 6271880831.        4437095       1414.
##  9 2017-05-01 Elec- Commercial  8437405736.         313795      26888.
## 10 2017-05-01 Elec- Residential 7065368413.        4438946       1592.
## # … with 86 more rows
library(tidyverse)
library(plotly)

pge_chart_elec <-
  pge_final_elec %>% 
  ggplot() +
  geom_bar(
    aes(
      x = DATE,
      y = TOTALKBTU,
      fill = CUSTOMERCLASS
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Month",
    y = "kBTU",
    title = "PG&E Territory Monthly Electricity Usage, 2017-2020",
    fill = "Electricity Type"
  )

pge_chart_elec

pge_chart_elec %>% ggplotly()
pge_chart_elec %>% 
  ggplotly() %>% 
  layout(
    xaxis = list(fixedrange = T),
    yaxis = list(fixedrange = T)
  ) %>% 
  config(displayModeBar = F)